# Importation des librairie
library(ggplot2)
library(ggalt)
## Registered S3 methods overwritten by 'ggalt':
## method from
## grid.draw.absoluteGrob ggplot2
## grobHeight.absoluteGrob ggplot2
## grobWidth.absoluteGrob ggplot2
## grobX.absoluteGrob ggplot2
## grobY.absoluteGrob ggplot2
library(ggExtra)
library(ggcorrplot)
library(plotly)
##
## Attachement du package : 'plotly'
## L'objet suivant est masqué depuis 'package:ggplot2':
##
## last_plot
## L'objet suivant est masqué depuis 'package:stats':
##
## filter
## L'objet suivant est masqué depuis 'package:graphics':
##
## layout
library(quantmod)
## Le chargement a nécessité le package : xts
## Le chargement a nécessité le package : zoo
##
## Attachement du package : 'zoo'
## Les objets suivants sont masqués depuis 'package:base':
##
## as.Date, as.Date.numeric
## Le chargement a nécessité le package : TTR
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggthemes)
library(waffle)
library(highcharter)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks plotly::filter(), stats::filter()
## x dplyr::first() masks xts::first()
## x dplyr::lag() masks stats::lag()
## x dplyr::last() masks xts::last()
library(ggfortify)
## Registered S3 method overwritten by 'ggfortify':
## method from
## fortify.table ggalt
Nuages de points
theme_set(theme_bw())
data("midwest", package = "ggplot2")
head(midwest)
## # A tibble: 6 x 28
## PID county state area poptotal popdensity popwhite popblack popamerindian
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int>
## 1 561 ADAMS IL 0.052 66090 1271. 63917 1702 98
## 2 562 ALEXAND~ IL 0.014 10626 759 7054 3496 19
## 3 563 BOND IL 0.022 14991 681. 14477 429 35
## 4 564 BOONE IL 0.017 30806 1812. 29344 127 46
## 5 565 BROWN IL 0.018 5836 324. 5264 547 14
## 6 566 BUREAU IL 0.05 35688 714. 35157 50 65
## # ... with 19 more variables: popasian <int>, popother <int>, percwhite <dbl>,
## # percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
## # popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
## # poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
## # percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## # percelderlypoverty <dbl>, inmetro <int>, category <chr>
gg <- ggplot(data = midwest, aes(x = area, y = poptotal))+
geom_point()
gg

gg <- ggplot(data = midwest, aes(x = area, y = poptotal))+
geom_point(aes(col = state, size = popdensity))
gg

gg <- ggplot(data = midwest, aes(x = area, y = poptotal)) +
geom_point(aes(col = state, size = popdensity)) +
geom_smooth(method = "lm", se = T)
gg
## `geom_smooth()` using formula 'y ~ x'

gg <- ggplot(data = midwest, aes(x = area, y = poptotal)) +
geom_point(aes(col = state, size = popdensity)) +
geom_smooth(method = "lm", se = T) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(subtitle = "surface vs pop",
y = "population",
x = "surface",
title = "Nuage de points de la population en fonction de la surface du comté",
caption = "Source : midwest dataset")
gg
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

ggplotly(gg)
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
#
midwest_select <- midwest[midwest$poptotal > 350000 &
midwest$poptotal <= 500000 &
midwest$area >0.01 &
midwest$area < 0.1, ]
midwest_select
## # A tibble: 6 x 28
## PID county state area poptotal popdensity popwhite popblack popamerindian
## <int> <chr> <chr> <dbl> <int> <dbl> <int> <int> <int>
## 1 659 WILL IL 0.05 357313 7146. 303420 38361 692
## 2 707 LAKE IN 0.03 475594 15853. 334203 116688 865
## 3 1221 GENESEE MI 0.037 430459 11634. 336651 84257 3132
## 4 2056 LUCAS OH 0.021 462361 22017. 380155 68456 1164
## 5 2084 STARK OH 0.034 367585 10811. 339421 25052 950
## 6 2993 DANE WI 0.073 367085 5029. 344617 10511 1201
## # ... with 19 more variables: popasian <int>, popother <int>, percwhite <dbl>,
## # percblack <dbl>, percamerindan <dbl>, percasian <dbl>, percother <dbl>,
## # popadults <int>, perchsd <dbl>, percollege <dbl>, percprof <dbl>,
## # poppovertyknown <int>, percpovertyknown <dbl>, percbelowpoverty <dbl>,
## # percchildbelowpovert <dbl>, percadultpoverty <dbl>,
## # percelderlypoverty <dbl>, inmetro <int>, category <chr>
gg2 <- ggplot(data = midwest, aes(x = area, y = poptotal)) +
geom_point(aes(col = state, size = popdensity)) +
geom_smooth(method = "lm", se = T) +
geom_encircle(aes(x = area, y = poptotal), # encercler une partie des donnnées
data = midwest_select,
col = "red",
expand = 0.08,
size = 2) +
xlim(c(0, 0.1)) +
ylim(c(0, 500000)) +
labs(subtitle = "surface vs pop",
y = "population",
x = "surface",
title = "Nuage de points de la population en fonction de la surface du comté",
caption = "Source : midwest dataset")
gg2
## `geom_smooth()` using formula 'y ~ x'
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

Jitter plot : Nuage de points spécial
data("mpg")
head(mpg)
## # A tibble: 6 x 11
## manufacturer model displ year cyl trans drv cty hwy fl class
## <chr> <chr> <dbl> <int> <int> <chr> <chr> <int> <int> <chr> <chr>
## 1 audi a4 1.8 1999 4 auto(l5) f 18 29 p compa~
## 2 audi a4 1.8 1999 4 manual(m5) f 21 29 p compa~
## 3 audi a4 2 2008 4 manual(m6) f 20 31 p compa~
## 4 audi a4 2 2008 4 auto(av) f 21 30 p compa~
## 5 audi a4 2.8 1999 6 auto(l5) f 16 26 p compa~
## 6 audi a4 2.8 1999 6 manual(m5) f 18 26 p compa~
g <- ggplot(data = mpg, aes(x = cty, y = hwy))
g + geom_point() +
geom_smooth(method = "lm") +
labs(subtitle = "City vs Hwy",
title = "Nuage de points qui se chevauchent",
caption = "Source : mpg dataset")
## `geom_smooth()` using formula 'y ~ x'

# Les points ne se chevauchent plus
g <- g + geom_jitter(width = 0.5,
size = 1) +
geom_smooth(method = "lm") +
labs(subtitle = "City vs Hwy",
title = "Kilométrage autoroute en fonction de kilométrage ville",
caption = "Source : mpg dataset")
ggplotly(g)
## `geom_smooth()` using formula 'y ~ x'